package com.landmark;

import java.io.FileWriter;
import java.net.URL;
import java.net.URLConnection;
import java.util.Enumeration;
import java.util.Hashtable;
import javax.sound.midi.SysexMessage;
import javax.swing.JTextArea;
import javax.swing.SwingWorker;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.nodes.TextNode;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class MotengCat extends SwingWorker<Boolean, Boolean> {

    FileWriter fw = null;
    FileWriter errorFile = null;
    int pageNo;
    StringBuffer sb = new StringBuffer();
    int count = 0;
    Hashtable<String, String> htModelLinks = new Hashtable<String, String>();
    String status = "";
    JTextArea txtArea;
    StringBuilder sbUrlData = new StringBuilder();

    @Override
    protected Boolean doInBackground() throws Exception {
        start();
        return true;
    }

    @Override
    protected void done() {
        Util.taskCompleteMsg(status);
    }

    public void start() {
        try {
            txtArea = Util.createOutputFile("Moteng");
            HelperBean helperBean = Util.getHelperBean();

            fw = new FileWriter(helperBean.getOutputFile());
            errorFile = new FileWriter(helperBean.getErrorFile());

            // Here we get all the search pages from page 0 to 9 which we know by seeing the site
            for (pageNo = 1; pageNo <= 974; pageNo++) {
                if (Util.getHelperBean().isTerminateCurrentProcess()) {
                    break;
                }
                String str = "http://www.moteng.com/cgi-bin/main/co_disp/displ/curpage/" + pageNo + "/carfnbr/945/scpl/1";
                txtArea.setText(str);
                txtArea.paintImmediately(txtArea.getVisibleRect());
                processCategoryPage(str);
            }

            txtArea.append("\nFiles are located at : \n" + helperBean.getOutputFile().getAbsolutePath() + "\n" + helperBean.getErrorFile().getAbsolutePath() + "\n");
            txtArea.append("Total products : " + htModelLinks.size() + "\n");
            txtArea.paintImmediately(txtArea.getVisibleRect());

            Enumeration<String> keys = htModelLinks.keys();

            iterateCollection(htModelLinks);


            if (fw != null) {
                fw.close();
            }

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (fw != null) {
                    fw.close();
                }
            } catch (Exception e1) {
                // TODO: handle exception
            }
        }
        status = "Done processing.";
        if (Util.getHelperBean().isTerminateCurrentProcess()) {
            status = "Process Terminated";
        }
        txtArea.append("\n" + status);
    }

    /**
     *
     * @param htLinks
     */
    private void iterateCollection(Hashtable<String, String> htLinks) {
        try {
            Enumeration<String> keys = htLinks.keys();
            while (keys.hasMoreElements()) {
                if (Util.getHelperBean().isTerminateCurrentProcess()) {
                    break;
                }
                String key = (String) keys.nextElement();
                txtArea.append("Processing page: " + key + "\n");
                txtArea.paintImmediately(txtArea.getVisibleRect());
                sb = new StringBuffer();
                processModels(htLinks.get(key).toString());
                fw.write(sb.toString().replaceAll("\n", "").replaceAll("\r", "").replaceAll("    ", "").replace("<tr>", "\n"));
                fw.write("\n");
                fw.flush();
            }
        } catch (Exception e) {
        }
    }

    /**
     *
     * @param aUrl
     */
    private void processModels(String aUrl) {
        try {
            Parser parser = Util.getParserFromUrl(aUrl, null);
            fw.write(aUrl + "\n");
            Document doc = Jsoup.connect(aUrl).get();
            Elements elements = doc.select("#placeholder");
            System.out.println("$$$$$    :" + elements.size());
            for (Element element : elements) {
                String absoluteUrl = element.attr("src");
                Util.saveImage(absoluteUrl.replace("_full_", "_large_"), ".", null);
            }

            NodeList tableNodes = parser.extractAllNodesThatMatch(new HasAttributeFilter("class", "prod_desc2"));

            Node targetTableNode = tableNodes.elementAt(0);// retrives the table content where the description starts which we are intersetd in.			

            iterateTag(targetTableNode);

        } catch (Exception e) {
            try {
                errorFile.write(aUrl + " \n" + e.getMessage() + " \n");
                errorFile.flush();
            } catch (Exception e1) {
            }
        }
    }

    /**
     *
     * @param aNodeList
     */
    private void iterateTag(Node aNodeList) {
        if (aNodeList instanceof TextNode) {
            TextNode text = (TextNode) aNodeList;
            sb.append(text.getText());
        } else if (aNodeList instanceof TagNode) {
            TagNode tag = (TagNode) aNodeList;
            if (tag.getRawTagName().equalsIgnoreCase("td")) {
                sb.append(",");
            }
            // process recursively (nodes within nodes) via getChildren()				
            NodeList nl = tag.getChildren();
            if (null != nl) {
                try {
                    for (NodeIterator i = nl.elements(); i.hasMoreNodes();) {
                        iterateTag(i.nextNode());
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    }

    private void processCategoryPage(String aUrl) {
        try {
            URL oracle = new URL(aUrl);
            sbUrlData = new StringBuilder();
            URLConnection yc = oracle.openConnection();

            Document doc = Jsoup.parse(yc.getInputStream(), null, aUrl);
            Elements elements = doc.select("div.pr_pic").select("a[href]");

            for (Element element : elements) {

                String absoluteUrl = element.attr("abs:href");

                htModelLinks.put(absoluteUrl, absoluteUrl);
            }
        } catch (Exception e) {
            try {
                errorFile.write(aUrl + " \n" + e.getMessage() + " \n");
                errorFile.flush();
                e.printStackTrace();
            } catch (Exception e1) {
            }
        }
    }
}
